# -*- coding: utf-8 -*-

import torch
from modelscope import snapshot_download
from transformers import AutoTokenizer, AutoModel
model_dir = snapshot_download("ZhipuAI/chatglm3-6b", revision = "v1.0.0")
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)

print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)

with torch.no_grad():
    #采用量化方案的ChatGLM3
    model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).half().cpu().eval()

model = model.eval()
response, history = model.chat(tokenizer, "你好", history=[])
print(response)
response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history)
print(response)